/*=======================================================================================
	Impute-before-tax-income.do

		Computes houshold before-tax income distribution in Alaska using 5-year
		ACS 2010-2014

	Author: Lorenz Kueng, August 2017
=========================================================================================*/

cap log close PFW_03c
log using "$homedir/log-files/PFW_03c_$date.log", text replace name(PFW_03c)


*=============================================================
* Data 
*=============================================================

use "$homedir/data/stata/PFW_quarterly.dta", clear

	*** Apply same sample selection as in main regression analysis

		** Winsorize nondurables and services at 1%

			cap drop temp
			winsor   nondur_serv, generate(nondur_serv_W) p(0.01) 
			replace  nondur_serv_W = 0 if nondur_serv_W==.

		** Run baseline regression to select same estimation sample 

			reghdfe D.nondur_serv_W APFD ///
				if APFDid_annual!=. & liquid_asset1!=. ///
				, absorb(date Alaska) vce(cluster userid)
			drop nondur_serv_W

			keep if e(sample)

		** Add PFD to regular income
			
			replace  APFD_annual = 0 if state=="WA" // set to zero (before it was imputed based on self-reported family sizes to serve as a 'placebo' dividend)
			replace  APFD_annual = NumbChecks*1884 if year==2014 & state=="AK" // impute unobserved dividend receipts in 2014 as the sample stops before October



*=============================================================
* Create TAXSIM variables 
*=============================================================
	
		* 1. tax year
		* year 
		
		* 2. state (convert FIPS to SOI state codes)
		cap drop state
		generate state =  2 if state_fips == 2  // Alaska
		replace  state = 48 if state_fips == 53 // Washington 

		* 3. marital status (ie taxfiler status)
		cap drop mstat
		generate mstat = 1 if marriagestatus=="S" | marriagestatus=="D" // single filer (S: single, D: divorced)
		replace  mstat = 2 if marriagestatus=="M" | marriagestatus=="P" // joint filer (M: married, P: partners living together)
		replace  mstat = 3 if mstat==1 & adults==1 & children>0  // head of household
		replace  mstat = 1 if missing(mstat)==1 & adults==1 & children==0  
		replace  mstat = 3 if missing(mstat)==1 & adults==1 & children>0 & children!=.  
		replace  mstat = 1 if missing(mstat)==1 & adults==1 
		replace  mstat = 2 if missing(mstat)==1 & adults >1 
		tabmiss  mstat
		tabulate mstat

		* 4. number of dependents
		replace adults   =2 if adults==. // imputation with modal value
		replace children =0 if children==.
		
		generate depx = adults-1 + children 

		* 5. number of taxpayers age>65 (0,1, or 2)
		generate agex = 0 
		replace  agex = 1 if age>65 & adults==1
		replace  agex = 2 if age>65 & adults>=2 & adults!=.

		* 6. wage and salary of taxpayer (incl. self-employment). Note: Will be dropped by TAXSIM if negative.
		generate pwages = incomeY + APFD_annual // income after deduction and tax withholding, inclusive of PFD
		

		* 7. wage and salary of spouse (incl. self-employment). 
		generate swages = 0 // no information

		* 8. dividends. Note: Qualified dividends only from 2003 on. 
		generate dividends = 0 

		* 9. interest and other property income
		generate otherprop =  0

		* 10. taxable pensions
		generate pensions = 0 // pensions, private annuities, annuities from IRA,Keogh

		* 11. gross social security income
		generate gssi = 0  // social security and rail-road retirement income. Note: SOCRRX includes FRRETIRX.

		* 12. other non-taxable transfer income
		generate transfers = 0

		* 13. rent paid
		generate rentpaid = 0 

		* 14. property and other taxes (part of itemized deductions)
		generate proptax = 0 

		* 15. additional personal itemized deductions (except mortgage, state and property tax)
		gen otheritem = 0 
		
		* 16. child care expenses
		gen childcare =  0 
		
		* 17. unemployment compensation
		gen ui = 0 

		* 18. number of dependents under age 17
		gen depchild = children 

		* 19. mortgage interest
		cap drop mortgage
		gen mortgage = 0

		* 20. short-term capital gains
		gen stcg = 0 

		* 21. long-term capital gains
		gen ltcg = 0 


	keep userid year ///
		state mstat depx agex pwages swages dividends otherprop pensions gssi transfers ///
		rentpaid proptax otheritem childcare ui depchild mortgage stcg ltcg	
	
	count
	duplicates drop _all, force
	count
	
	save "$homedir/data/stata/PFW_quarterly_TAXSIM.dta", replace



*=============================================================
* Impute taxes using TAXSIM 
*=============================================================
		
use "$homedir/data/stata/PFW_quarterly_TAXSIM.dta", clear	
	
	** Initial guess of income before taxes
	
		generate income_after_tax_initial = pwages
		generate income_after_tax         = pwages
		
		replace  pwages = 1.3*pwages


	** First run of TAXSIM
	
		cap n net from "http://www.nber.org/stata"
		cap n net describe taxsim9
		cap n net install taxsim9

		di c(current_time)
		taxsim9, replace full
		di c(current_time) // solved in about 20 seconds
		
		** Imputed after-tax income
		replace income_after_tax = pwages - fiitax - siitax
		
		** Difference bewteen observed and imputed after-tax income
		cap drop _gap
		generate _gap = income_after_tax - income_after_tax_initial
		sum _gap, d
		
		** Relative error
		cap drop _epsilon
		generate _epsilon = abs(income_after_tax/income_after_tax_initial-1)
		sum _epsilon, d
		local _epsilon = `r(max)'
	
		** Update pre-tax income guess
		replace pwages = pwages - _gap
	

	** Iterate TAXSIM

	forvalues i=1/20 {
		
		display _n(10)"i=`i': _epsilon=`_epsilon'"
		
		if `_epsilon'>0.01 {
		
			di c(current_time)
			taxsim9, replace full
			di c(current_time)
			
			replace income_after_tax = pwages - fiitax - siitax
			
			cap drop _gap
			generate _gap = income_after_tax - income_after_tax_initial
			sum _gap, d
			
			cap drop _epsilon
			generate _epsilon = abs(income_after_tax/income_after_tax_initial-1)
			sum _epsilon, d
			local _epsilon = `r(max)'
		
			replace pwages = pwages - _gap
		}
	}

	generate incomeY_incl_PFD_before_tax = pwages
	replace  incomeY_incl_PFD_before_tax = income_after_tax_initial if missing(incomeY_incl_PFD_before_tax)==1 // update missing with negative income to match mean of summary stats
	lab var  incomeY_incl_PFD_before_tax "imputed before-tax income using TAXSIM"
	
	generate incomeY_incl_PFD_after_tax  = pwages - fiitax - siitax
	replace  incomeY_incl_PFD_after_tax  = income_after_tax_initial if missing(incomeY_incl_PFD_after_tax)==1 
	lab var  incomeY_incl_PFD_after_tax "imputed after-tax income using TAXSIM (approx = incomeY_incl_PFD)"

	lab var  income_after_tax_initial "original income (incl. dividend) from PFW data"
	
		** Check gap
		cap drop _gap
		generate _gap = incomeY_incl_PFD_after_tax - income_after_tax_initial
		sum _gap, d
		
		** Check relative error
		cap drop _epsilon
		generate _epsilon = abs(incomeY_incl_PFD_after_tax/income_after_tax_initial-1)
		sum _epsilon, d

	
	bysort state: sum incomeY_incl_PFD_before_tax incomeY_incl_PFD_after_tax income_after_tax_initial, d
	
	keep userid year state incomeY_incl_PFD_before_tax  incomeY_incl_PFD_after_tax  income_after_tax_initial
	compress
	label data "Imputed before-tax income recursively using TAXSIM; Impute-before-taxincome.do"
	sort userid year
	cap n saveold "$homedir/data/stata/PFW_quarterly_ImputedIncomeBeforeTaxes.dta", replace
	cap n saveold "$homedir/data/stata/PFW_quarterly_ImputedIncomeBeforeTaxes.dta", replace version(11)

	


*=============================================================
* Summary statistics of before-tax income 
*=============================================================
	
use "$homedir/data/stata/PFW_quarterly_ImputedIncomeBeforeTaxes.dta", clear
	
	** Winsorize nominal variables

		foreach var in  incomeY_incl_PFD_before_tax  incomeY_incl_PFD_after_tax  income_after_tax_initial /// 
					{
			winsor   `var', generate(`var'W) p(0.01)
		}

	
	** Summary statistics
	
		gen Alaska = state==2
			
		bysort Alaska: tabstat incomeY_incl_PFD_before_tax*  incomeY_incl_PFD_after_tax*  income_after_tax_initial*, statistics(N mean median sd) columns(statistics) noseparator varwidth(32)
		
		
	** Compare with ACS household income distribution  for Alaska (before-tax)
	
		count if  Alaska==1
			local Ntotal = `r(N)'
		count if  Alaska==1  &  incomeY_incl_PFD_before_tax<= 33400 									  // 1st ACS income quintile
			local f1 = round(`r(N)'/`Ntotal'*100,0.01)
		count if  Alaska==1  &  incomeY_incl_PFD_before_tax<= 58400 &  incomeY_incl_PFD_before_tax> 33400 // 2nd ACS income quintile
			local f2 = round(`r(N)'/`Ntotal'*100,0.01)
		count if  Alaska==1  &  incomeY_incl_PFD_before_tax<= 86099 &  incomeY_incl_PFD_before_tax> 58400 // 3rd ACS income quintile
			local f3 = round(`r(N)'/`Ntotal'*100,0.01)
		count if  Alaska==1  &  incomeY_incl_PFD_before_tax<=128469 &  incomeY_incl_PFD_before_tax> 86099 // 4th ACS income quintile
			local f4 = round(`r(N)'/`Ntotal'*100,0.01)
		count if  Alaska==1  &  incomeY_incl_PFD_before_tax<.       &  incomeY_incl_PFD_before_tax>128469 // 5th ACS income quintile
			local f5 = round(`r(N)'/`Ntotal'*100,0.01)
					
		display "Fraction of users in 1st ACS before-tax income quintile: `f1'%"	
		display "Fraction of users in 2nd ACS before-tax income quintile: `f2'%"	
		display "Fraction of users in 3rd ACS before-tax income quintile: `f3'%"	
		display "Fraction of users in 4th ACS before-tax income quintile: `f4'%"	
		display "Fraction of users in 5th ACS before-tax income quintile: `f5'%"	
		
		di `f1'+`f2'+`f3'+`f4'+`f5'
	
log close PFW_03c
 
